#!/usr/bin/env python
# -*- coding=utf-8 -*-

#本脚本用于将refgene的内容处理后以用于绘图
import sys

def parse_refgene(refgene_file,ofile):
    """
    """
    with open(refgene_file,'r') as indata,\
    open(ofile,'w') as odata:
        odata.write('gene\trefseq\tchrom\ttxstart\ttxend\tstart\tend\tfeature\tstrand\n')
        for line in indata:
            line_list = line.strip().split('\t')
            NM = line_list[1]
            chrom = line_list[2]
            strand = line_list[3]
            txstart = line_list[4] 
            txend = line_list[5]
            exon_starts = [i.strip() for i in line_list[9].split(',') if i] 
            exon_ends = [i.strip() for i in line_list[10].split(',') if i]
            gene_name = line_list[12]
            feature = 'exon'
            for s,e in zip(exon_starts,exon_ends):
                oline = [gene_name,NM,chrom,txstart,txend,s,e,feature,strand]
                odata.write("\t".join(oline) + "\n")
    print('output plot config file to {}'.format(ofile))


if __name__ == "__main__":
    infile = sys.argv[1]
    ofile = sys.argv[2]
    parse_refgene(infile,ofile)
